library(sf)
library(geojsonsf)
library(tidyverse)
library(tmap)
Import data from files - NYC NTA Polygons - NYC NTA to tract equivalence - NYS OD
options(scipen = 999)
nyc_nta_geo <- geojson_sf('./data/nyc_nta_2010.geojson')
nyc_tract_nta_equiv <- readxl::read_xlsx('./data/nyc_2010_census_tract_nta_equiv.xlsx')
ny_origin_dest <- read_csv("./data/ny_od_main_JT00_2019.csv")
Define places of interest
boros <- c("Bronx", "Brooklyn", "Manhattan", "Queens")
county_codes <- c("005", "047", "061", "081")
parks <- c("BX10", "BX99", "BK99", "MN99", "QN99")
Create equivalency for counties and tracts with NTAs
county_tract_nta_equiv <- readxl::read_xlsx('./data/nyc_2010_census_tract_nta_equiv.xlsx') %>%
filter(!(nta_code %in% parks)) %>%
mutate(county_tract = str_c(`county_code`, `census_tract`)) %>%
select("county_tract", "nta_code")
Reduce NYS origin destination data to only ntas of interest
nta_ods <- ny_origin_dest %>%
filter(
str_sub(as.character(w_geocode), 3, 5) %in% county_codes &
str_sub(as.character(h_geocode), 3, 5) %in% county_codes
) %>%
mutate(w_county_tract = str_sub(as.character(w_geocode), 3, 11)) %>%
mutate(h_county_tract = str_sub(as.character(h_geocode), 3, 11)) %>%
select(h_county_tract, w_county_tract, S000) %>%
left_join(county_tract_nta_equiv, c("h_county_tract" = "county_tract")) %>%
rename(h_nta_code = nta_code) %>%
left_join(county_tract_nta_equiv, c("w_county_tract" = "county_tract")) %>%
rename(w_nta_code = nta_code) %>%
mutate(od = str_c(h_nta_code, w_nta_code)) %>%
group_by(od) %>%
summarise(
h_nta_code,
w_nta_code,
S000 = sum(S000),
) %>%
unique()
Look exclusively at trips that start and end in the same Borough
intra_borough_nta_ods <- nta_ods %>%
filter(
str_sub(h_nta_code, 1, 2) == str_sub(w_nta_code, 1, 2)
) %>%
mutate(boro = str_sub(h_nta_code, 1, 2))
Reduce NYC geography to NTAs of interest
nta_polys <- nyc_nta_geo %>%
filter(CountyFIPS %in% county_codes) %>%
filter(!(NTACode %in% parks)) %>%
select(BoroName, NTACode, geometry)
Find point on surface
nta_points <- nta_polys %>%
mutate(geometry = st_point_on_surface(geometry))
## Warning in st_point_on_surface.sfc(geometry): st_point_on_surface may not give
## correct results for longitude/latitude data
Define most popular destinations
intra_borough_dests <- intra_borough_nta_ods %>%
group_by(w_nta_code) %>%
summarise(
w_nta_code,
S000 = sum(S000)
) %>%
unique() %>%
left_join(nta_polys, c("w_nta_code"= "NTACode")) %>%
st_as_sf()
Define most popular origins
intra_borough_origins <- intra_borough_nta_ods %>%
group_by(h_nta_code) %>%
summarise(
h_nta_code,
S000 = sum(S000)
) %>%
unique() %>%
left_join(nta_polys, c("h_nta_code" = "NTACode")) %>%
st_as_sf()
Define desire lines for origins and destinations - Origin and Destination NTA must be different
intra_boro_od_lines <- intra_borough_nta_ods %>%
filter(h_nta_code != w_nta_code) %>%
left_join(nta_points, c("h_nta_code" = "NTACode")) %>%
rename(h_geometry = geometry) %>%
left_join(nta_points, c("w_nta_code" = "NTACode")) %>%
rename(w_geometry = geometry) %>%
mutate(geometry = st_union(h_geometry, w_geometry)) %>%
mutate(geometry = st_cast(geometry, "LINESTRING")) %>%
select("od","boro", "S000", "geometry") %>%
st_as_sf()
Define home desire areas - Origin and destination must be the same
intra_nta_desires <- intra_borough_nta_ods %>%
filter(h_nta_code == w_nta_code) %>%
rename(nta_code = h_nta_code) %>%
left_join(nta_polys, c("nta_code" = "NTACode")) %>%
select("od", "boro", "nta_code", "S000", "geometry") %>%
st_as_sf()
mn_origins <- intra_borough_origins %>%
filter(BoroName == "Manhattan")
tm_shape(mn_origins) +
tm_polygons(
col = "S000",
style = "jenks",
title = "Residents"
) +
tm_layout(
title = "Most popular origins Manhattan",
legend.outside = TRUE
)
mn_dests <- intra_borough_dests %>%
filter(BoroName == "Manhattan")
tm_shape(mn_dests) +
tm_polygons(
col = "S000",
style = "jenks",
title = "Employers"
) +
tm_layout(
title = "Most popular destinations in Manhattan",
legend.outside = TRUE
)
mn_outlines <- nta_polys %>%
filter(BoroName == "Manhattan")
mn_od_lines <- intra_boro_od_lines %>%
filter(boro == "MN")
tm_shape(mn_outlines) +
tm_polygons(
col = "BoroName",
title = "Borough",
) +
tm_shape(mn_od_lines) +
tm_lines(
col = "#212121",
lwd = "S000",
) +
tm_layout(
legend.outside = TRUE,
legend.text.size = 0.55,
)
mn_intra_desires <- intra_nta_desires %>%
filter(boro == "MN")
tm_shape(mn_intra_desires) +
tm_polygons(
col = "S000",
title = "Number of workers",
) +
tm_layout(
legend.outside = TRUE,
legend.text.size = 0.55,
)
bx_origins <- intra_borough_origins %>%
filter(BoroName == "Bronx")
tm_shape(bx_origins) +
tm_polygons(
col = "S000",
style = "jenks",
title = "Residents"
) +
tm_layout(
title = "Most popular origins in the Bronx",
legend.outside = TRUE
)
bx_dests <- intra_borough_dests %>%
filter(BoroName == "Bronx")
tm_shape(bx_dests) +
tm_polygons(
col = "S000",
style = "jenks",
title = "Employers"
) +
tm_layout(
title = "Most popular destinations in the Bronx",
legend.outside = TRUE
)
bx_outlines <- nta_polys %>%
filter(BoroName == "Bronx")
bx_od_lines <- intra_boro_od_lines %>%
filter(boro == "BX")
tm_shape(bx_outlines) +
tm_polygons(
col = "BoroName",
title = "Borough",
) +
tm_shape(bx_od_lines) +
tm_lines(
col = "#212121",
lwd = "S000",
) +
tm_layout(
legend.outside = TRUE,
legend.text.size = 0.55,
)
bx_intra_desires <- intra_nta_desires %>%
filter(boro == "BX")
tm_shape(bx_intra_desires) +
tm_polygons(
col = "S000",
title = "Number of workers",
) +
tm_layout(
legend.outside = TRUE,
legend.text.size = 0.55,
)
bn_origins <- intra_borough_origins %>%
filter(BoroName == "Brooklyn")
tm_shape(bn_origins) +
tm_polygons(
col = "S000",
style = "jenks",
title = "Residents"
) +
tm_layout(
title = "Most popular origins in Brooklyn",
legend.outside = TRUE
)
bn_dests <- intra_borough_dests %>%
filter(BoroName == "Brooklyn")
tm_shape(bn_dests) +
tm_polygons(
col = "S000",
style = "jenks",
title = "Employers"
) +
tm_layout(
title = "Most popular destinations in Brooklyn",
legend.outside = TRUE
)
bn_outlines <- nta_polys %>%
filter(BoroName == "Brooklyn")
bn_od_lines <- intra_boro_od_lines %>%
filter(boro == "BK")
tm_shape(bn_outlines) +
tm_polygons(
col = "BoroName",
title = "Borough",
) +
tm_shape(bn_od_lines) +
tm_lines(
col = "#212121",
lwd = "S000",
) +
tm_layout(
legend.outside = TRUE,
legend.text.size = 0.55,
)
bn_intra_desires <- intra_nta_desires %>%
filter(boro == "BK")
tm_shape(bn_intra_desires) +
tm_polygons(
col = "S000",
title = "Number of workers",
) +
tm_layout(
legend.outside = TRUE,
legend.text.size = 0.55,
)
qn_origins <- intra_borough_origins %>%
filter(BoroName == "Queens")
tm_shape(qn_origins) +
tm_polygons(
col = "S000",
style = "jenks",
title = "Residents"
) +
tm_layout(
title = "Most popular origins in Queens",
legend.outside = TRUE
)
qn_dests <- intra_borough_dests %>%
filter(BoroName == "Queens")
tm_shape(qn_dests) +
tm_polygons(
col = "S000",
style = "jenks",
title = "Employers"
) +
tm_layout(
title = "Most popular destinations in Queens",
legend.outside = TRUE
)
qn_outlines <- nta_polys %>%
filter(BoroName == "Queens")
qn_od_lines <- intra_boro_od_lines %>%
filter(boro == "QN")
tm_shape(qn_outlines) +
tm_polygons(
col = "BoroName",
title = "Borough",
) +
tm_shape(qn_od_lines) +
tm_lines(
col = "#212121",
lwd = "S000",
) +
tm_layout(
legend.outside = TRUE,
legend.text.size = 0.5,
)
qn_intra_desires <- intra_nta_desires %>%
filter(boro == "QN")
tm_shape(qn_intra_desires) +
tm_polygons(
col = "S000",
title = "Number of workers",
) +
tm_layout(
legend.outside = TRUE,
legend.text.size = 0.55,
)
sum(filter(intra_borough_nta_ods, boro=="MN")$S000)
## [1] 569012
sum(filter(intra_nta_desires, boro=="MN")$S000)
## [1] 60139
sum(filter(intra_borough_nta_ods, boro=="BX")$S000)
## [1] 129125
sum(filter(intra_nta_desires, boro=="BX")$S000)
## [1] 14406
sum(filter(intra_borough_nta_ods, boro=="BK")$S000)
## [1] 425774
sum(filter(intra_nta_desires, boro=="BK")$S000)
## [1] 55518
sum(filter(intra_borough_nta_ods, boro=="QN")$S000)
## [1] 305396
sum(filter(intra_nta_desires, boro=="QN")$S000)
## [1] 40010